library(tidyverse)
library(lubridate) # work with dates
library(magrittr) # more pipes
library(broom) # tidy regression outputs
library(fixest) # fixest::feols() is very fast with big data and fixed effects
library(patchwork) # for combining plots
library(ggthemes) # for ggthemes
library(knitr) # for making regression tables in Latex

shift <- data.table::shift # for lagging/leading variables

#### Load in and tidy data ####
mp_df <- readRDS("data/analysis/mp_df.rds")

# Load in relevant MP-level covariates
mp_covariates <- read_csv("data/analysis/mp_hte_covariates.csv")

# Merge
mp_hte <- left_join(mp_df, mp_covariates, by = "full_name")


#### Heterogeneous effects models ####

## Heterogeneous effects by party: Labour/Conservative

hte_labcon_tweets <- feols(sum_ctweets ~ sum_tweets + frontbench +
                             fff_events_cumulative1 * lab1_con0  |
                             full_name + year_week,
                           data = mp_df)

hte_labcon_speeches <- feols(sum_cspchs ~ sum_spchs + frontbench +
                               fff_events_cumulative1 * lab1_con0  |
                               full_name + year_week,
                             data = mp_df)

## Heterogeneous effects for the All-Parliament Group on Climate Change

hte_apg_tweets <- feols(sum_ctweets ~ sum_tweets + frontbench +
                          fff_events_cumulative1 * apg_climate_190731 | 
                          full_name + year_week,
                        data = mp_hte)

hte_apg_speeches <- feols(sum_cspchs ~ sum_spchs + frontbench +
                          fff_events_cumulative1 * apg_climate_190731 | 
                          full_name + year_week,
                        data = mp_hte)

## Heterogeneous effects for the Conservative Environment Network MPs v. Tories

hte_cen_tweets <- feols(sum_ctweets ~ sum_tweets + frontbench +
                          fff_events_cumulative1 * cen_caucus_191213  |
                          full_name + year_week,
                        data = mp_hte %>% filter(party_value == "Conservative"))

hte_cen_speeches <- feols(sum_cspchs ~ sum_spchs + frontbench +
                            fff_events_cumulative1 * cen_caucus_191213  |
                            full_name + year_week,
                          data = mp_hte %>% filter(party_value == "Conservative"))

## Heterogeneous effects for the Net Zero Scrutiny Group v. Tories

hte_nzsg_tweets <- feols(sum_ctweets ~ sum_tweets + frontbench +
                           fff_events_cumulative1 * netzero_scrutiny |
                           full_name + year_week,
                         data = mp_hte %>% filter(party_value == "Conservative"))

hte_nzsg_speeches <- feols(sum_cspchs ~ sum_spchs + frontbench +
                             fff_events_cumulative1 * netzero_scrutiny |
                             full_name + year_week,
                           data = mp_hte %>% filter(party_value == "Conservative"))

## Frontbench

hte_frontbench_tweets <- feols(sum_ctweets ~ sum_tweets + 
                                 frontbench * fff_events_cumulative1  |
                                 full_name + year_week,
                               data = filter(mp_df, !is.na(lab1_con0)))
hte_frontbench_speeches <- feols(sum_cspchs ~ sum_spchs + 
                                   frontbench * fff_events_cumulative1  |
                                   full_name + year_week,
                                 data = filter(mp_df, !is.na(lab1_con0)))

## Combine HTE tweets models into table
table_hte_tweets <- bind_rows(hte_apg_tweets %>% tidy() %>% mutate(model = 1),
                              hte_labcon_tweets %>% tidy() %>% mutate(model = 2),
                              hte_cen_tweets %>% tidy() %>% mutate(model = 3),
                              hte_nzsg_tweets %>% tidy() %>% mutate(model = 4),
                              hte_frontbench_tweets %>% tidy() %>% mutate(model = 5)) %>%
  set_colnames(c("term", "estimate", "std_error", "t_statistic", "p_value", "model")) %>%
  filter(str_detect(term, "fff")) %>%
  mutate_at(vars(estimate:std_error), ~format(round(., digits = 3),
                                              nsmall = 3)) %>%
  mutate_all(~as.character(.)) %>%
  pivot_longer(names_to = "param", values_to = "value",
               estimate:p_value) %>%
  mutate(value = ifelse(param == "std_error",
                        paste("(", value, ")", sep = ""), value)) %>%
  group_by(model, term) %>% 
  mutate(p_value = as.numeric(max(ifelse(param=="p_value", value, 0)))) %>% 
  mutate(value = ifelse(param == "estimate" & (p_value < 0.05 & p_value > 0.01),
                        paste(value, "*", sep = ""),
                        ifelse(param == "estimate" & (p_value < 0.01),
                               paste(value, "**", sep = ""),
                               value))) %>% 
  select(-p_value) %>% 
  ungroup() %>% 
  filter(param != "t_statistic",
         param != "p_value") %>%
  pivot_wider(names_from = "model", values_from = "value",
              names_prefix = "H") %>%
  add_row(term = "Fixed effect", param = "fe1",
          H1 = "MP",
          H2 = "MP",
          H3 = "MP",
          H4 = "MP",
          H5 = "MP") %>%
  add_row(term = "Fixed effect", param = "fe2",
          H1 = "Year--week",
          H2 = "Year--week",
          H3 = "Year--week",
          H4 = "Year--week",
          H5 = "Year--week") %>%
  add_row(term = "nobs", param = "nobs",
          H1 = as.character(hte_apg_tweets$nobs),
          H2 = as.character(hte_labcon_tweets$nobs),
          H3 = as.character(hte_cen_tweets$nobs),
          H4 = as.character(hte_nzsg_tweets$nobs),
          H5 = as.character(hte_frontbench_tweets$nobs)) %>%
  mutate(term = case_when(term == "fff_events_cumulative1" ~ "FFF",
                          term == "fff_events_cumulative1:lab1_con0" ~ "FFF times Labour",
                          term == "fff_events_cumulative1:cen_caucus_191213" ~ "FFF times CEN",
                          term == "fff_events_cumulative1:netzero_scrutiny" ~ "FFF times NZSG",
                          term == "fff_events_cumulative1:apg_climate_190731" ~ "FFF times APG",
                          term == "frontbench:fff_events_cumulative1" ~ "FFF times Frontbench",
                          param == "fe1" ~ "Unit fixed effect",
                          param == "fe2" ~ "Time fixed effect",
                          param == "nobs" ~ "Observations")) %>%
  mutate(term = ifelse(param == "std_error", "", term)) %>% 
  select(-param) %>% 
  # select(-param, `APP-H1` = H1, `APP-H2` = H2, `APP-H3` = H3, `APP-H4` = H4) %>% 
  mutate_at(vars(2:6), ~ifelse(is.na(.), "", .)) %>% 
  add_row(term = "Covariates", 
          H1 = "Yes",
          H2 = "Yes",
          H3 = "Yes",
          H4 = "Yes",
          H5 = "Yes") %>%
  {. ->> int_hte_tweets } %>%
  # filter(term!="") %>% # Remove the standard error term
  rbind(., c("Outcome", rep("Tweets", times = 5))) %>% 
  knitr::kable(.,
               format = "simple",
               booktabs = T,
               linesep = "")


## Combine HTE speech models into table
table_hte_speeches <- bind_rows(hte_apg_speeches %>% tidy() %>% mutate(model = 6),
                                hte_labcon_speeches %>% tidy() %>% mutate(model = 7),
                              hte_cen_speeches %>% tidy() %>% mutate(model = 8),
                              hte_nzsg_speeches %>% tidy() %>% mutate(model = 9),
                              hte_frontbench_speeches %>% tidy() %>% mutate(model = 10)) %>%
  set_colnames(c("term", "estimate", "std_error", "t_statistic", "p_value", "model")) %>%
  filter(str_detect(term, "fff")) %>%
  mutate_at(vars(estimate:std_error), ~format(round(., digits = 3),
                                              nsmall = 3)) %>%
  mutate_all(~as.character(.)) %>%
  pivot_longer(names_to = "param", values_to = "value",
               estimate:p_value) %>%
  mutate(value = ifelse(param == "std_error",
                        paste("(", value, ")", sep = ""), value)) %>%
  group_by(model, term) %>% 
  mutate(p_value = as.numeric(max(ifelse(param=="p_value", value, 0)))) %>% 
  mutate(value = ifelse(param == "estimate" & (p_value < 0.05 & p_value > 0.01),
                        paste(value, "*", sep = ""),
                        ifelse(param == "estimate" & (p_value < 0.01),
                               paste(value, "**", sep = ""),
                               value))) %>% 
  select(-p_value) %>% 
  ungroup() %>% 
  filter(param != "t_statistic",
         param != "p_value") %>%
  pivot_wider(names_from = "model", values_from = "value",
              names_prefix = "H") %>%
  add_row(term = "Fixed effect", param = "fe1",
          H6 = "MP",
          H7 = "MP",
          H8 = "MP",
          H9 = "MP",
          H10 = "MP") %>%
  add_row(term = "Fixed effect", param = "fe2",
          H6 = "Year--week",
          H7 = "Year--week",
          H8 = "Year--week",
          H9 = "Year--week",
          H10 = "Year--week") %>%
  add_row(term = "nobs", param = "nobs",
          H6 = as.character(hte_apg_speeches$nobs),
          H7 = as.character(hte_labcon_speeches$nobs),
          H8 = as.character(hte_cen_speeches$nobs),
          H9 = as.character(hte_nzsg_speeches$nobs),
          H10 = as.character(hte_frontbench_speeches$nobs)) %>%
  mutate(term = case_when(term == "fff_events_cumulative1" ~ "FFF",
                          term == "fff_events_cumulative1:lab1_con0" ~ "FFF times Labour",
                          term == "fff_events_cumulative1:cen_caucus_191213" ~ "FFF times CEN",
                          term == "fff_events_cumulative1:netzero_scrutiny" ~ "FFF times NZSG",
                          term == "fff_events_cumulative1:apg_climate_190731" ~ "FFF times APG",
                          term == "frontbench:fff_events_cumulative1" ~ "FFF times Frontbench",
                          param == "fe1" ~ "Unit fixed effect",
                          param == "fe2" ~ "Time fixed effect",
                          param == "nobs" ~ "Observations")) %>%
  mutate(term = ifelse(param == "std_error", "", term)) %>% 
  select(-param) %>% 
  mutate_at(vars(2:6), ~ifelse(is.na(.), "", .)) %>% 
  add_row(term = "Covariates", 
          H6 = "Yes",
          H7 = "Yes",
          H8 = "Yes",
          H9 = "Yes",
          H10 = "Yes") %>%
  {. ->> int_hte_speeches } %>%
  # filter(term!="") %>% # Remove the standard error term
  rbind(., c("Outcome", rep("Speeches", times = 5))) %>% 
  knitr::kable(.,
               format = "simple",
               booktabs = T,
               linesep = "")


## Combining tweets and speeches in one table
table_hte_combined <- cbind(int_hte_tweets[,1:2], int_hte_speeches[,2],
      int_hte_tweets[,3], int_hte_speeches[,3],
      int_hte_tweets[,4], int_hte_speeches[,4],
      int_hte_tweets[,5], int_hte_speeches[,5],
      int_hte_tweets[,6], int_hte_speeches[,6]) %>% 
  rbind(., c("Outcome", rep(c("Tweets", "Speeches"), times = 5))) %>% 
  set_colnames(value = c("term", paste("M", seq(from = 13, to = 22, by = 1), sep = ""))) %>% 
  knitr::kable(.,
               format = "latex",
               booktabs = T,
               linesep = "")
table_hte_combined
